#!/usr/bin/python
# -*- coding:utf-8 -*- 
import urllib2
import httplib
import re
import webbrowser
from sgmllib import SGMLParser
import time
import os
import logging as lg
lg.basicConfig(filename = os.path.join(os.getcwd(), 'log.txt'),level = lg.DEBUG, format = '%(asctime)s - %(levelname)s: %(message)s') 
 
class parserHREF(SGMLParser):
	def __init__(self):
		SGMLParser.__init__(self)
		self.is_a = ""
		self.href_value=""
		self.target =""
	def start_a(self, attrs):
	    if len(attrs)==1 and attrs[0][0]=="href":
	        self.is_a=1
	        self.href_value=attrs[0][1]              
	def end_a(self):
		self.is_a = ""
	def handle_data(self, text):
		if self.is_a == 1 and re.findall(time.strftime("%Y%m%d", time.localtime()),text):
		#'''time.strftime("%Y%m%d", time.localtime())'''
		    self.target=self.href_value
		    
class parserContent(SGMLParser):
	def __init__(self):
		SGMLParser.__init__(self)
		self.is_table1 = ""
		self.is_table2=""
		self.content =""
	def start_body(self, attrs):	   
	    #if len(attrs) >= 1 and attrs[0][0]=="class" and attrs[0][1]=="oblog_t_2":
	        lg.info(attrs)
	        self.is_table1=1              
	def end_body(self):
		self.is_table2 = 1
	def handle_data(self, text):
		if self.is_table1 == 1 and self.is_table2 == 1:
		    self.content=text
		    self.is_table1=""
		    self.is_table2=""
		    
linkaddr = parserHREF()
url="http://www.dapenti.com/blog/blog.asp?subjectid=70&name=xilei"

page = urllib2.urlopen(url).read()
linkaddr.feed(page)



if linkaddr.target != "":
    page=urllib2.urlopen("http://www.dapenti.com/blog/"+linkaddr.target).read()
    ct=re.findall("<TABLE style=\"TABLE-LAYOUT.*</P>\\r\\n<DIV style=\"WIDOWS: 2",page,re.S)[0]+"\">============END========</DIV></SPAN></TD><TR></TBODY></TABLE>"
    
    rmNode=re.findall("<SPAN style=\"FLOAT: right.*?</SPAN>",ct,re.S)
    for tmp in rmNode:
        ct=ct.replace(tmp,"")
    
    rmNode=re.findall("<IMG.*?>",ct,re.S)
    for tmp in rmNode:
        ct=ct.replace(tmp,"<strong>IMAGE IGNORED</strong>\r\n")
    fd=open("/home/g5tf87/scripts/tugu.html","w")
    fd.write(ct)
    fd.close()



